{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv\n",
    "import json\n",
    "import random\n",
    "import jsonlines\n",
    "import time\n",
    "import pandas as pd\n",
    "import geopy\n",
    "import geopy.distance\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def question11_gen(data, num_questions_per_template, questions, question_id):\n",
    "    # What is the total price at least if you want to stay at {NAME} with id {id} for {number} nights?\n",
    "    target = question_id + num_questions_per_template\n",
    "    while question_id < target:\n",
    "        random_indices = random.randint(0, len(data) - 1)\n",
    "        row = data.iloc[random_indices]\n",
    "        Name = row[\"NAME\"]\n",
    "        id = row[\"id\"]\n",
    "        neighbourhood = row[\"neighbourhood\"]\n",
    "        number = random.randint(1, 20)\n",
    "        least_days = row[\"minimum nights\"]\n",
    "        question = \"What is the total price at least if you want to stay at {} in {} for {} nights?\".format(Name, neighbourhood, number)\n",
    "        if type(row[\"price\"]) == str:\n",
    "            price1 = float(re.sub(\",\",\"\",row[\"price\"][1:]))\n",
    "        else:\n",
    "            price1 = row[\"price\"]\n",
    "        if type(row[\"service fee\"]) == str:\n",
    "            price2 = float(re.sub(\",\",\"\",row[\"service fee\"][1:]))\n",
    "        else:\n",
    "            price2 = row[\"service fee\"]\n",
    "        if number < least_days:\n",
    "            answer = \"$ {:.1f}\".format((price1 + price2) * least_days)\n",
    "        else:\n",
    "            answer = \"$ {:.1f}\".format((price1 + price2) * number)\n",
    "        questions.append({\"qid\": \"hard-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "        question_id += 1\n",
    "    return questions, question_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def question12_gen(data, num_questions_per_template, questions, question_id):\n",
    "    # How many airbnbs are there in {neighbourhood}?\n",
    "    target = question_id + num_questions_per_template\n",
    "    neighbourhood_list = data[\"neighbourhood\"].unique()\n",
    "    while question_id < target:\n",
    "        neighbourhood = random.choice(neighbourhood_list)\n",
    "        question = \"How many airbnbs are there in {}?\".format(neighbourhood)\n",
    "        answer = str(len(data[data[\"neighbourhood\"] == neighbourhood]))\n",
    "        questions.append({\"qid\": \"hard-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "        question_id += 1\n",
    "    return questions, question_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def question13_gen(data, num_questions_per_template, questions, question_id):\n",
    "    # What is the average price of airbnbs in {neighbourhood}?\n",
    "    target = question_id + num_questions_per_template\n",
    "    neighbourhood_list = data[\"neighbourhood\"].unique()\n",
    "    while question_id < target:\n",
    "        neighbourhood = random.choice(neighbourhood_list)\n",
    "        sub_data = data[data[\"neighbourhood\"] == neighbourhood]\n",
    "        average_price = 0\n",
    "        for row_id in range(len(sub_data)):\n",
    "            content = sub_data.iloc[row_id]\n",
    "            if type(content[\"price\"]) == str:\n",
    "                average_price += float(re.sub(\",\", \"\", content[\"price\"][1:]))\n",
    "            else:\n",
    "                average_price += content[\"price\"]\n",
    "        if len(sub_data) != 0:\n",
    "            average_price = average_price / len(sub_data)\n",
    "            question = \"What is the average price of airbnbs in {}?\".format(neighbourhood)\n",
    "            answer = \"$ {:.1f}\".format(average_price)\n",
    "            questions.append({\"qid\": \"hard-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "            question_id += 1\n",
    "    return questions, question_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def question14_gen(data, num_questions_per_template, questions, question_id):\n",
    "    # What is the average review rates within 5 miles from {NAME} with id {id}?\n",
    "    target = question_id + num_questions_per_template\n",
    "    while question_id < target:\n",
    "        random_indices = random.randint(0, len(data) - 1)\n",
    "        row = data.iloc[random_indices]\n",
    "        name = row[\"NAME\"]\n",
    "        id = row[\"id\"]\n",
    "        latitude = row[\"lat\"]\n",
    "        longitude = row[\"long\"]\n",
    "        neighbourhood = row[\"neighbourhood\"]\n",
    "        _, lo_max, _ = geopy.distance.distance(kilometers=5).destination(point=(latitude, longitude), bearing=90)\n",
    "        _, lo_min, _ = geopy.distance.distance(kilometers=5).destination(point=(latitude, longitude), bearing=270)\n",
    "        la_max, _, _ = geopy.distance.distance(kilometers=5).destination(point=(latitude, longitude), bearing=0)\n",
    "        la_min, _, _ = geopy.distance.distance(kilometers=5).destination(point=(latitude, longitude), bearing=180)\n",
    "        sub_data = data[(data[\"lat\"] <= la_max) & (data[\"lat\"] >= la_min) & (data[\"long\"] <= lo_max) & (data[\"long\"] >= lo_min)]\n",
    "        if len(sub_data) != 0:\n",
    "            question = \"What is the average review rates within 5 miles from {} in {}?\".format(name, neighbourhood)\n",
    "            answer = round(sub_data[\"review rate number\"].mean(), 2)\n",
    "            questions.append({\"qid\": \"hard-yelp-{:0>4d}\".format(question_id), \"question\": question, \"answer\": answer})\n",
    "            question_id += 1\n",
    "    return questions, question_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def question15_gen(data, num_questions_per_template, questions, question_id):\n",
    "    # How much proporion of airbnbs in {neighbourhood} have a flexible cancellation policy?\n",
    "    target = question_id + num_questions_per_template\n",
    "    neighbourhood_list = data[\"neighbourhood\"].unique()\n",
    "    while question_id < target:\n",
    "        neighbourhood = random.choice(neighbourhood_list)\n",
    "        sub_data = data[data[\"neighbourhood\"] == neighbourhood]\n",
    "        sub_data = sub_data[sub_data[\"cancellation_policy\"] == \"flexible\"]\n",
    "        question = \"How much proporion of airbnbs in {} have a flexible cancellation policy?\".format(neighbourhood)\n",
    "        if len(data[data[\"neighbourhood\"] == neighbourhood]) != 0:\n",
    "            answer = str(round(len(sub_data) / len(data[data[\"neighbourhood\"] == neighbourhood]), 2))\n",
    "            questions.append({\"qid\": \"hard-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "            question_id += 1\n",
    "    return questions, question_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def question16_gen(data, num_questions_per_template, questions, question_id):\n",
    "    # How much does it cost per night to stay at the most expensive entire home/apt in {neighbourhood}?\n",
    "    target = question_id + num_questions_per_template\n",
    "    neighbourhood_list = data[\"neighbourhood\"].unique()\n",
    "    while question_id < target:\n",
    "        neighbourhood = random.choice(neighbourhood_list)\n",
    "        sub_data = data[data[\"neighbourhood\"] == neighbourhood]\n",
    "        sub_data = sub_data[sub_data[\"room type\"] == \"Entire home/apt\"]\n",
    "        sub_data = sub_data.sort_values(by=[\"price\"], ascending=False)\n",
    "        question = \"How much does it cost per night to stay at the most expensive entire home/apt in {}?\".format(neighbourhood)\n",
    "        if len(sub_data) != 0:\n",
    "            if type(sub_data.iloc[0][\"price\"]) == str:\n",
    "                answer = sub_data.iloc[0][\"price\"]\n",
    "            else:\n",
    "                answer = \"$ {}\".format(sub_data.iloc[0][\"price\"])\n",
    "            questions.append({\"qid\": \"hard-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "            question_id += 1\n",
    "    return questions, question_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def question17_gen(data, num_questions_per_template, questions, question_id):\n",
    "    # How many airbnbs are there in {neighbourhood} that have a review rate higher than 4?\n",
    "    target = question_id + num_questions_per_template\n",
    "    neighbourhood_list = data[\"neighbourhood\"].unique()\n",
    "    while question_id < target:\n",
    "        neighbourhood = random.choice(neighbourhood_list)\n",
    "        sub_data = data[data[\"neighbourhood\"] == neighbourhood]\n",
    "        sub_data = sub_data[sub_data[\"review rate number\"] >= 4]\n",
    "        question = \"How many airbnbs are there in {} that have a review rate higher than 4?\".format(neighbourhood)\n",
    "        answer = str(len(sub_data))\n",
    "        questions.append({\"qid\": \"hard-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "        question_id += 1\n",
    "    return questions, question_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def question18_gen(data, num_questions_per_template, questions, question_id):\n",
    "    # Can you recommend me a hotel room with the lowest price in {neighbourhood}?\n",
    "    target = question_id + num_questions_per_template\n",
    "    neighbourhood_list = data[\"neighbourhood\"].unique()\n",
    "    while question_id < target:\n",
    "        neighbourhood = random.choice(neighbourhood_list)\n",
    "        sub_data = data[data[\"neighbourhood\"] == neighbourhood]\n",
    "        sub_data = sub_data.sort_values(by=[\"price\"], ascending=True)\n",
    "        question = \"Can you recommend me a hotel room with the lowest price in {}?\".format(neighbourhood)\n",
    "        answer = sub_data.iloc[0][\"NAME\"]\n",
    "        questions.append({\"qid\": \"hard-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "        question_id += 1\n",
    "    return questions, question_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def question19_gen(data, num_questions_per_template, questions, question_id):\n",
    "    # Can you recommend me a private room with the highest reviews per month that can host at least 2 people in  {neighbourhood}?\n",
    "    target = question_id + num_questions_per_template\n",
    "    neighbourhood_list = data[\"neighbourhood\"].unique()\n",
    "    while question_id < target:\n",
    "        neighbourhood = random.choice(neighbourhood_list)\n",
    "        sub_data = data[data[\"neighbourhood\"] == neighbourhood]\n",
    "        sub_data = sub_data[sub_data[\"room type\"] == \"Private room\"]\n",
    "        sub_data = sub_data[sub_data[\"calculated host listings count\"] >= 2]\n",
    "        sub_data = sub_data.sort_values(by=[\"reviews per month\"], ascending=False)\n",
    "        if len(sub_data) != 0:\n",
    "            question = \"Can you recommend me a private room with the highest review rate that can host at least 2 people in {}?\".format(neighbourhood)\n",
    "            answer = sub_data.iloc[0][\"NAME\"]\n",
    "            questions.append({\"qid\": \"hard-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "            question_id += 1\n",
    "    return questions, question_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def question20_gen(data, num_questions_per_template, questions, question_id):\n",
    "    # Can you recommend a shared room with the lowest price within 5 miles from {longitude} longitude and {latitude} latitude?\n",
    "    target = question_id + num_questions_per_template\n",
    "    while question_id < target:\n",
    "        longitude = random.uniform(-74.2589, -73.7004)\n",
    "        latitude = random.uniform(40.4774, 40.9176)\n",
    "        sub_data = data[(data[\"long\"] - longitude)**2 + (data[\"lat\"] - latitude)**2 <= 0.0169]\n",
    "        sub_data = sub_data[sub_data[\"room type\"] == \"Shared room\"]\n",
    "        sub_data = sub_data.sort_values(by=[\"price\"], ascending=True)\n",
    "        if len(sub_data) != 0:\n",
    "            question = \"Can you recommend a shared room with the lowest price within 10 miles from {} longitude and {} latitude?\".format(longitude, latitude)\n",
    "            answer = sub_data.iloc[0][\"NAME\"]\n",
    "            questions.append({\"qid\": \"hard-airbnb-{:0>4d}\".format(question_id), \"question\":question, \"answer\":answer})\n",
    "            question_id += 1\n",
    "    return questions, question_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_128621/329846299.py:3: DtypeWarning: Columns (25) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  data = pd.read_csv(file_path)\n"
     ]
    }
   ],
   "source": [
    "start_time = time.time()\n",
    "file_path = \"/<YOUR_OWN_PATH>/ToolQA/data/raw_data/airbnb/Airbnb_Open_Data.csv\"\n",
    "data = pd.read_csv(file_path)\n",
    "\n",
    "num_questions_per_template = 10\n",
    "question_id = 0\n",
    "questions = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'hard-airbnb-0000', 'question': 'What is the total price at least if you want to stay at Bright, modern room with panoramic window in Maspeth for 8 nights?', 'answer': '$ 42450.0'}, {'qid': 'hard-airbnb-0001', 'question': 'What is the total price at least if you want to stay at Quiet and spacious room for 2 in Maspeth for 4 nights?', 'answer': '$ 3848.0'}, {'qid': 'hard-airbnb-0002', 'question': 'What is the total price at least if you want to stay at Spacious Lovely 1B1B in UP Manhattan 4Min to Train in Washington Heights for 19 nights?', 'answer': '$ nan'}, {'qid': 'hard-airbnb-0003', 'question': 'What is the total price at least if you want to stay at Beautiful loft in DUMBO Brooklyn in Vinegar Hill for 7 nights?', 'answer': '$ 3661.0'}, {'qid': 'hard-airbnb-0004', 'question': 'What is the total price at least if you want to stay at Ditmas Park Beauty in Flatbush for 2 nights?', 'answer': '$ 3498.0'}, {'qid': 'hard-airbnb-0005', 'question': 'What is the total price at least if you want to stay at Peaceful, cozy studio in Chinatown in Chinatown for 9 nights?', 'answer': '$ 13820.0'}, {'qid': 'hard-airbnb-0006', 'question': 'What is the total price at least if you want to stay at Home away from home in Canarsie for 2 nights?', 'answer': '$ 488.0'}, {'qid': 'hard-airbnb-0007', 'question': 'What is the total price at least if you want to stay at An East Williamsburg Delight in Bedford-Stuyvesant for 12 nights?', 'answer': '$ 6552.0'}, {'qid': 'hard-airbnb-0008', 'question': 'What is the total price at least if you want to stay at Queen Sized Bdrm in Historic Harlem (Townhouse) in Harlem for 20 nights?', 'answer': '$ 8020.0'}, {'qid': 'hard-airbnb-0009', 'question': \"What is the total price at least if you want to stay at Cozy and Cute 1 bdrm in perfect Midtown location in Hell's Kitchen for 8 nights?\", 'answer': '$ 12360.0'}]\n",
      "Time elapsed for Question 11: 0.44878292083740234 seconds\n"
     ]
    }
   ],
   "source": [
    "# question template 11\n",
    "questions, question_id = question11_gen(data, num_questions_per_template, questions, question_id)\n",
    "end_time = time.time()\n",
    "print(questions[-10:])\n",
    "print(\"Time elapsed for Question 11: {} seconds\".format(end_time - start_time))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'hard-airbnb-0010', 'question': 'How many airbnbs are there in Rosedale?', 'answer': '171'}, {'qid': 'hard-airbnb-0011', 'question': 'How many airbnbs are there in Flatlands?', 'answer': '217'}, {'qid': 'hard-airbnb-0012', 'question': 'How many airbnbs are there in Brighton Beach?', 'answer': '168'}, {'qid': 'hard-airbnb-0013', 'question': 'How many airbnbs are there in Whitestone?', 'answer': '26'}, {'qid': 'hard-airbnb-0014', 'question': 'How many airbnbs are there in Jamaica Estates?', 'answer': '50'}, {'qid': 'hard-airbnb-0015', 'question': 'How many airbnbs are there in Windsor Terrace?', 'answer': '331'}, {'qid': 'hard-airbnb-0016', 'question': 'How many airbnbs are there in Port Richmond?', 'answer': '25'}, {'qid': 'hard-airbnb-0017', 'question': 'How many airbnbs are there in Bronxdale?', 'answer': '48'}, {'qid': 'hard-airbnb-0018', 'question': 'How many airbnbs are there in Roosevelt Island?', 'answer': '159'}, {'qid': 'hard-airbnb-0019', 'question': 'How many airbnbs are there in Van Nest?', 'answer': '26'}]\n",
      "Time elapsed for Question 12: 0.5159854888916016 seconds\n"
     ]
    }
   ],
   "source": [
    "# question template 12\n",
    "questions, question_id = question12_gen(data, num_questions_per_template, questions, question_id)\n",
    "end_time = time.time()\n",
    "print(questions[-10:])\n",
    "print(\"Time elapsed for Question 12: {} seconds\".format(end_time - start_time))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'hard-airbnb-0020', 'question': 'What is the average price of airbnbs in Boerum Hill?', 'answer': '$ 627.0'}, {'qid': 'hard-airbnb-0021', 'question': 'What is the average price of airbnbs in Woodside?', 'answer': '$ nan'}, {'qid': 'hard-airbnb-0022', 'question': 'What is the average price of airbnbs in Upper East Side?', 'answer': '$ nan'}, {'qid': 'hard-airbnb-0023', 'question': 'What is the average price of airbnbs in Financial District?', 'answer': '$ nan'}, {'qid': 'hard-airbnb-0024', 'question': 'What is the average price of airbnbs in Grant City?', 'answer': '$ 558.2'}, {'qid': 'hard-airbnb-0025', 'question': 'What is the average price of airbnbs in East Morrisania?', 'answer': '$ 764.7'}, {'qid': 'hard-airbnb-0026', 'question': 'What is the average price of airbnbs in Howland Hook?', 'answer': '$ 657.3'}, {'qid': 'hard-airbnb-0027', 'question': 'What is the average price of airbnbs in Glen Oaks?', 'answer': '$ 550.5'}, {'qid': 'hard-airbnb-0028', 'question': 'What is the average price of airbnbs in East Harlem?', 'answer': '$ nan'}, {'qid': 'hard-airbnb-0029', 'question': 'What is the average price of airbnbs in Stuyvesant Town?', 'answer': '$ 680.9'}]\n",
      "Time elapsed for Question 13: 1.2129943370819092 seconds\n"
     ]
    }
   ],
   "source": [
    "# question template 13\n",
    "questions, question_id = question13_gen(data, num_questions_per_template, questions, question_id)\n",
    "end_time = time.time()\n",
    "print(questions[-10:])\n",
    "print(\"Time elapsed for Question 13: {} seconds\".format(end_time - start_time))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'hard-yelp-0030', 'question': 'What is the average review rates within 5 miles from Spacious 1 BR in Midtown East in Midtown?', 'answer': 3.27}, {'qid': 'hard-yelp-0031', 'question': 'What is the average review rates within 5 miles from 10 mins/Airports JFK/LGA/Hosp/malls bus/train#4 in Richmond Hill?', 'answer': 3.39}, {'qid': 'hard-yelp-0032', 'question': 'What is the average review rates within 5 miles from Williamsburg Home with a View in Williamsburg?', 'answer': 3.28}, {'qid': 'hard-yelp-0033', 'question': 'What is the average review rates within 5 miles from Spacious Midtown East 2 Bedroom in Midtown?', 'answer': 3.27}, {'qid': 'hard-yelp-0034', 'question': 'What is the average review rates within 5 miles from Slick Prime Lower East East ~BRAND NEW 1br Apt~ in Lower East Side?', 'answer': 3.27}, {'qid': 'hard-yelp-0035', 'question': 'What is the average review rates within 5 miles from Heart of Park Slope in Park Slope?', 'answer': 3.26}, {'qid': 'hard-yelp-0036', 'question': 'What is the average review rates within 5 miles from Charming Mini Loft/ Lower East Side in Lower East Side?', 'answer': 3.27}, {'qid': 'hard-yelp-0037', 'question': 'What is the average review rates within 5 miles from 3 Bedrooms, 1000 Pvt Sq ft in Historic Brownstone. in Harlem?', 'answer': 3.26}, {'qid': 'hard-yelp-0038', 'question': 'What is the average review rates within 5 miles from NEWLY RENOVATED,  studio in Park Slope in Park Slope?', 'answer': 3.26}, {'qid': 'hard-yelp-0039', 'question': 'What is the average review rates within 5 miles from Wonderful apartment with view & Rooftop!!!! in Astoria?', 'answer': 3.28}]\n",
      "Time elapsed for Question 14: 1.3120503425598145 seconds\n"
     ]
    }
   ],
   "source": [
    "# question template 14\n",
    "questions, question_id = question14_gen(data, num_questions_per_template, questions, question_id)\n",
    "end_time = time.time()\n",
    "print(questions[-10:])\n",
    "print(\"Time elapsed for Question 14: {} seconds\".format(end_time - start_time))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'hard-airbnb-0040', 'question': 'How much proporion of airbnbs in West Brighton have a flexible cancellation policy?', 'answer': '0.31'}, {'qid': 'hard-airbnb-0041', 'question': 'How much proporion of airbnbs in Fordham have a flexible cancellation policy?', 'answer': '0.31'}, {'qid': 'hard-airbnb-0042', 'question': 'How much proporion of airbnbs in Ditmars Steinway have a flexible cancellation policy?', 'answer': '0.3'}, {'qid': 'hard-airbnb-0043', 'question': 'How much proporion of airbnbs in Co-op City have a flexible cancellation policy?', 'answer': '0.2'}, {'qid': 'hard-airbnb-0044', 'question': 'How much proporion of airbnbs in SoHo have a flexible cancellation policy?', 'answer': '0.34'}, {'qid': 'hard-airbnb-0045', 'question': 'How much proporion of airbnbs in Wakefield have a flexible cancellation policy?', 'answer': '0.4'}, {'qid': 'hard-airbnb-0046', 'question': 'How much proporion of airbnbs in East Elmhurst have a flexible cancellation policy?', 'answer': '0.33'}, {'qid': 'hard-airbnb-0047', 'question': 'How much proporion of airbnbs in Wakefield have a flexible cancellation policy?', 'answer': '0.4'}, {'qid': 'hard-airbnb-0048', 'question': 'How much proporion of airbnbs in Howland Hook have a flexible cancellation policy?', 'answer': '0.17'}, {'qid': 'hard-airbnb-0049', 'question': 'How much proporion of airbnbs in Stuyvesant Town have a flexible cancellation policy?', 'answer': '0.33'}]\n",
      "Time elapsed for Question 15: 1.477332592010498 seconds\n"
     ]
    }
   ],
   "source": [
    "# question template 15\n",
    "questions, question_id = question15_gen(data, num_questions_per_template, questions, question_id)\n",
    "end_time = time.time()\n",
    "print(questions[-10:])\n",
    "print(\"Time elapsed for Question 15: {} seconds\".format(end_time - start_time))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'hard-airbnb-0050', 'question': 'How much does it cost per night to stay at the most expensive entire home/apt in Eastchester?', 'answer': '$961 '}, {'qid': 'hard-airbnb-0051', 'question': 'How much does it cost per night to stay at the most expensive entire home/apt in Throgs Neck?', 'answer': '$984 '}, {'qid': 'hard-airbnb-0052', 'question': 'How much does it cost per night to stay at the most expensive entire home/apt in Kingsbridge?', 'answer': '$978 '}, {'qid': 'hard-airbnb-0053', 'question': 'How much does it cost per night to stay at the most expensive entire home/apt in Emerson Hill?', 'answer': '$317 '}, {'qid': 'hard-airbnb-0054', 'question': 'How much does it cost per night to stay at the most expensive entire home/apt in Williamsbridge?', 'answer': '$990 '}, {'qid': 'hard-airbnb-0055', 'question': 'How much does it cost per night to stay at the most expensive entire home/apt in Edgemere?', 'answer': '$964 '}, {'qid': 'hard-airbnb-0056', 'question': 'How much does it cost per night to stay at the most expensive entire home/apt in Morningside Heights?', 'answer': '$997 '}, {'qid': 'hard-airbnb-0057', 'question': 'How much does it cost per night to stay at the most expensive entire home/apt in Howard Beach?', 'answer': '$968 '}, {'qid': 'hard-airbnb-0058', 'question': 'How much does it cost per night to stay at the most expensive entire home/apt in Kew Gardens?', 'answer': '$931 '}, {'qid': 'hard-airbnb-0059', 'question': 'How much does it cost per night to stay at the most expensive entire home/apt in Fort Wadsworth?', 'answer': '$1,024 '}]\n",
      "Time elapsed for Question 16: 1.5480306148529053 seconds\n"
     ]
    }
   ],
   "source": [
    "# question template 16\n",
    "questions, question_id = question16_gen(data, num_questions_per_template, questions, question_id)\n",
    "end_time = time.time()\n",
    "print(questions[-10:])\n",
    "print(\"Time elapsed for Question 16: {} seconds\".format(end_time - start_time))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'hard-airbnb-0060', 'question': 'How many airbnbs are there in Little Italy that have a review rate higher than 4?', 'answer': '119'}, {'qid': 'hard-airbnb-0061', 'question': 'How many airbnbs are there in Springfield Gardens that have a review rate higher than 4?', 'answer': '115'}, {'qid': 'hard-airbnb-0062', 'question': 'How many airbnbs are there in Eltingville that have a review rate higher than 4?', 'answer': '6'}, {'qid': 'hard-airbnb-0063', 'question': 'How many airbnbs are there in Belmont that have a review rate higher than 4?', 'answer': '21'}, {'qid': 'hard-airbnb-0064', 'question': 'How many airbnbs are there in Concord that have a review rate higher than 4?', 'answer': '28'}, {'qid': 'hard-airbnb-0065', 'question': 'How many airbnbs are there in Douglaston that have a review rate higher than 4?', 'answer': '10'}, {'qid': 'hard-airbnb-0066', 'question': 'How many airbnbs are there in Prospect-Lefferts Gardens that have a review rate higher than 4?', 'answer': '550'}, {'qid': 'hard-airbnb-0067', 'question': 'How many airbnbs are there in Randall Manor that have a review rate higher than 4?', 'answer': '25'}, {'qid': 'hard-airbnb-0068', 'question': 'How many airbnbs are there in Financial District that have a review rate higher than 4?', 'answer': '714'}, {'qid': 'hard-airbnb-0069', 'question': 'How many airbnbs are there in Norwood that have a review rate higher than 4?', 'answer': '32'}]\n",
      "Time elapsed for Question 17: 1.615236759185791 seconds\n"
     ]
    }
   ],
   "source": [
    "# question template 17\n",
    "questions, question_id = question17_gen(data, num_questions_per_template, questions, question_id)\n",
    "end_time = time.time()\n",
    "print(questions[-10:])\n",
    "print(\"Time elapsed for Question 17: {} seconds\".format(end_time - start_time))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'hard-airbnb-0070', 'question': 'Can you recommend me a hotel room with the lowest price in College Point?', 'answer': 'Doris’s Fresh House'}, {'qid': 'hard-airbnb-0071', 'question': 'Can you recommend me a hotel room with the lowest price in SoHo?', 'answer': 'Greenwich village 1 bedroom - spacious!'}, {'qid': 'hard-airbnb-0072', 'question': 'Can you recommend me a hotel room with the lowest price in Eltingville?', 'answer': '“No Place Like Home”\\n1st Floor Suburban Apt.'}, {'qid': 'hard-airbnb-0073', 'question': 'Can you recommend me a hotel room with the lowest price in Holliswood?', 'answer': 'ROOM EN NEW YORK, MANHATTAN'}, {'qid': 'hard-airbnb-0074', 'question': 'Can you recommend me a hotel room with the lowest price in Concourse?', 'answer': 'One Bedroom Apartment  in TownHouse'}, {'qid': 'hard-airbnb-0075', 'question': 'Can you recommend me a hotel room with the lowest price in Silver Lake?', 'answer': 'New Apartment, Close to Ferry'}, {'qid': 'hard-airbnb-0076', 'question': \"Can you recommend me a hotel room with the lowest price in Bull's Head?\", 'answer': 'Newly Renovated Guest Room with Private Bath'}, {'qid': 'hard-airbnb-0077', 'question': 'Can you recommend me a hotel room with the lowest price in Westerleigh?', 'answer': 'Eclectic Private New Apartment  Westerleigh, S.I.'}, {'qid': 'hard-airbnb-0078', 'question': 'Can you recommend me a hotel room with the lowest price in NoHo?', 'answer': 'Clean and Stylish Noho Loft w/ Comfy King Bed'}, {'qid': 'hard-airbnb-0079', 'question': 'Can you recommend me a hotel room with the lowest price in Clifton?', 'answer': 'Cozy Bedroom & Private Bath/Full use of Amenities'}]\n",
      "Time elapsed for Question 18: 1.6814179420471191 seconds\n"
     ]
    }
   ],
   "source": [
    "# question template 18\n",
    "questions, question_id = question18_gen(data, num_questions_per_template, questions, question_id)\n",
    "end_time = time.time()\n",
    "print(questions[-10:])\n",
    "print(\"Time elapsed for Question 18: {} seconds\".format(end_time - start_time))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'hard-airbnb-0080', 'question': 'Can you recommend me a private room with the highest review rate that can host at least 2 people in College Point?', 'answer': 'Beautiful and Clean Private Bedroom with Bathroom'}, {'qid': 'hard-airbnb-0081', 'question': 'Can you recommend me a private room with the highest review rate that can host at least 2 people in St. George?', 'answer': '@FERRY, Private Cozy Room, Renovated&Stylish.'}, {'qid': 'hard-airbnb-0082', 'question': 'Can you recommend me a private room with the highest review rate that can host at least 2 people in Flatiron District?', 'answer': 'Sonder Flatiron | Spacious King Room+'}, {'qid': 'hard-airbnb-0083', 'question': 'Can you recommend me a private room with the highest review rate that can host at least 2 people in Co-op City?', 'answer': 'A unique apartment,  clean, quiet, and peaceful !'}, {'qid': 'hard-airbnb-0084', 'question': 'Can you recommend me a private room with the highest review rate that can host at least 2 people in Howard Beach?', 'answer': 'PRIVATE, NEWLY RENOVATED BEDROOM, 10 MINS FROM JFK'}, {'qid': 'hard-airbnb-0085', 'question': 'Can you recommend me a private room with the highest review rate that can host at least 2 people in Tribeca?', 'answer': 'Classic, yet Unconventional Tribeca NYC Extra Room'}, {'qid': 'hard-airbnb-0086', 'question': 'Can you recommend me a private room with the highest review rate that can host at least 2 people in Castleton Corners?', 'answer': '2 Bedrooms across from a Beautiful Park'}, {'qid': 'hard-airbnb-0087', 'question': 'Can you recommend me a private room with the highest review rate that can host at least 2 people in Edenwald?', 'answer': 'Luxury Affordable comfort in the Bronx-Suite 3!'}, {'qid': 'hard-airbnb-0088', 'question': 'Can you recommend me a private room with the highest review rate that can host at least 2 people in Dongan Hills?', 'answer': 'comfortable & cozy qns size bed Feel it yourself'}, {'qid': 'hard-airbnb-0089', 'question': 'Can you recommend me a private room with the highest review rate that can host at least 2 people in Nolita?', 'answer': 'Great Room in Charming Nolita Apartment'}]\n",
      "Time elapsed for Question 19: 1.7769811153411865 seconds\n"
     ]
    }
   ],
   "source": [
    "# question template 19\n",
    "questions, question_id = question19_gen(data, num_questions_per_template, questions, question_id)\n",
    "end_time = time.time()\n",
    "print(questions[-10:])\n",
    "print(\"Time elapsed for Question 19: {} seconds\".format(end_time - start_time))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'qid': 'hard-airbnb-0090', 'question': 'Can you recommend a shared room with the lowest price within 10 miles from -74.06269614778004 longitude and 40.5813729578663 latitude?', 'answer': 'Premium Spot for female. Close to subway & Park'}, {'qid': 'hard-airbnb-0091', 'question': 'Can you recommend a shared room with the lowest price within 10 miles from -73.79998226317166 longitude and 40.581286157201006 latitude?', 'answer': 'Quiet & Elegant, 30 mins to midtown Manhattan!'}, {'qid': 'hard-airbnb-0092', 'question': 'Can you recommend a shared room with the lowest price within 10 miles from -73.70247078883878 longitude and 40.58720602526452 latitude?', 'answer': 'Home Away From Home'}, {'qid': 'hard-airbnb-0093', 'question': 'Can you recommend a shared room with the lowest price within 10 miles from -73.83109495286112 longitude and 40.835727665445795 latitude?', 'answer': 'Brand New Renovated Shared Room In  Manhattan'}, {'qid': 'hard-airbnb-0094', 'question': 'Can you recommend a shared room with the lowest price within 10 miles from -74.0426082801347 longitude and 40.689902908795524 latitude?', 'answer': 'Comfortable Futon Couch in Brooklyn'}, {'qid': 'hard-airbnb-0095', 'question': 'Can you recommend a shared room with the lowest price within 10 miles from -73.72509182668071 longitude and 40.79002903608048 latitude?', 'answer': 'Partitioned (shared) Cozy Studio in Pelham Bay'}, {'qid': 'hard-airbnb-0096', 'question': 'Can you recommend a shared room with the lowest price within 10 miles from -73.81552366025818 longitude and 40.89104376829842 latitude?', 'answer': 'Partitioned (shared) Cozy Studio in Pelham Bay'}, {'qid': 'hard-airbnb-0097', 'question': 'Can you recommend a shared room with the lowest price within 10 miles from -74.09292883460675 longitude and 40.72347282747828 latitude?', 'answer': 'Living room available! Modern apartment in midtown'}, {'qid': 'hard-airbnb-0098', 'question': 'Can you recommend a shared room with the lowest price within 10 miles from -73.91523625637042 longitude and 40.867183473498066 latitude?', 'answer': 'Brand New Renovated Shared Room In  Manhattan'}, {'qid': 'hard-airbnb-0099', 'question': 'Can you recommend a shared room with the lowest price within 10 miles from -73.89471040722765 longitude and 40.60167851935561 latitude?', 'answer': 'Comfortable Futon Couch in Brooklyn'}]\n",
      "Time elapsed for Question 20: 1.863246202468872 seconds\n"
     ]
    }
   ],
   "source": [
    "# question template 20\n",
    "questions, question_id = question20_gen(data, num_questions_per_template, questions, question_id)\n",
    "end_time = time.time()\n",
    "print(questions[-10:])\n",
    "print(\"Time elapsed for Question 20: {} seconds\".format(end_time - start_time))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "with jsonlines.open('/<YOUR_OWN_PATH>/ToolQA/data/questions/hard/airbnb-hard.jsonl', mode='w') as writer:\n",
    "    for row in questions:\n",
    "        writer.write(row)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llm",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
